summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2023-08-06 09:38:16 +0200
committerFernando Sahmkow <fsahmkow27@gmail.com>2023-09-23 23:05:30 +0200
commit282ae8fa51e060e6d4ef026b734aa871b1b9331e (patch)
tree3bc4603b6add0582315dc65544f1986427e4182d
parentQueryCache: Implement dependant queries. (diff)
downloadyuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar
yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.gz
yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.bz2
yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.lz
yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.xz
yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.zst
yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.zip
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h5
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp6
-rw-r--r--src/video_core/engines/puller.cpp6
-rw-r--r--src/video_core/fence_manager.h14
-rw-r--r--src/video_core/query_cache/bank_base.h16
-rw-r--r--src/video_core/query_cache/query_base.h44
-rw-r--r--src/video_core/query_cache/query_cache.h66
-rw-r--r--src/video_core/query_cache/query_cache_base.h8
-rw-r--r--src/video_core/query_cache/query_stream.h22
-rw-r--r--src/video_core/rasterizer_interface.h5
-rw-r--r--src/video_core/renderer_null/null_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp203
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp27
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h12
21 files changed, 270 insertions, 214 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f91b7d1e4..9e90c587c 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -276,9 +276,8 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad
}
template <class P>
-std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(VAddr cpu_addr, u32 size,
- ObtainBufferSynchronize sync_info,
- ObtainBufferOperation post_op) {
+std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
+ VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) {
const BufferId buffer_id = FindBuffer(cpu_addr, size);
Buffer& buffer = slot_buffers[buffer_id];
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 9507071e5..c4f6e8d12 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -297,8 +297,8 @@ public:
ObtainBufferOperation post_op);
[[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size,
- ObtainBufferSynchronize sync_info,
- ObtainBufferOperation post_op);
+ ObtainBufferSynchronize sync_info,
+ ObtainBufferOperation post_op);
void FlushCachedWrites();
/// Return true when there are uncommitted buffers to be downloaded
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 922c399e6..46b9c548a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -596,12 +596,6 @@ void Maxwell3D::ProcessCounterReset() {
case Regs::ClearReport::ZPassPixelCount:
rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
break;
- case Regs::ClearReport::PrimitivesGenerated:
- rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount);
- break;
- case Regs::ClearReport::VtgPrimitivesOut:
- rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount);
- break;
default:
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value);
break;
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 582738234..8dd34c04a 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -82,7 +82,8 @@ void Puller::ProcessSemaphoreTriggerMethod() {
if (op == GpuSemaphoreOperation::WriteLong) {
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
const u32 payload = regs.semaphore_sequence;
- rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
+ rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
+ VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
} else {
do {
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
@@ -117,7 +118,8 @@ void Puller::ProcessSemaphoreTriggerMethod() {
void Puller::ProcessSemaphoreRelease() {
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
const u32 payload = regs.semaphore_release;
- rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0);
+ rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
+ VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0);
}
void Puller::ProcessSemaphoreAcquire() {
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 8459a3092..805a89900 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -55,6 +55,9 @@ public:
// Unlike other fences, this one doesn't
void SignalOrdering() {
+ if constexpr (!can_async_check) {
+ TryReleasePendingFences<false>();
+ }
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.AccumulateFlushes();
}
@@ -104,13 +107,9 @@ public:
SignalFence(std::move(func));
}
- void WaitPendingFences(bool force) {
+ void WaitPendingFences([[maybe_unused]] bool force) {
if constexpr (!can_async_check) {
- if (force) {
- TryReleasePendingFences<true>();
- } else {
- TryReleasePendingFences<false>();
- }
+ TryReleasePendingFences<true>();
} else {
if (!force) {
return;
@@ -125,7 +124,8 @@ public:
});
SignalFence(std::move(func));
std::unique_lock lk(wait_mutex);
- wait_cv.wait(lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); });
+ wait_cv.wait(
+ lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); });
}
}
diff --git a/src/video_core/query_cache/bank_base.h b/src/video_core/query_cache/bank_base.h
index 4246a609d..420927091 100644
--- a/src/video_core/query_cache/bank_base.h
+++ b/src/video_core/query_cache/bank_base.h
@@ -7,21 +7,19 @@
#include <deque>
#include <utility>
-
#include "common/common_types.h"
namespace VideoCommon {
class BankBase {
protected:
- const size_t base_bank_size;
- size_t bank_size;
- std::atomic<size_t> references;
- size_t current_slot;
+ const size_t base_bank_size{};
+ size_t bank_size{};
+ std::atomic<size_t> references{};
+ size_t current_slot{};
public:
- BankBase(size_t bank_size_)
- : base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {}
+ explicit BankBase(size_t bank_size_) : base_bank_size{bank_size_}, bank_size(bank_size_) {}
virtual ~BankBase() = default;
@@ -58,11 +56,11 @@ public:
bank_size = current_slot;
}
- constexpr bool IsClosed() {
+ bool IsClosed() const {
return current_slot >= bank_size;
}
- bool IsDead() {
+ bool IsDead() const {
return IsClosed() && references == 0;
}
};
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h
index 0ae23af9f..993a13eac 100644
--- a/src/video_core/query_cache/query_base.h
+++ b/src/video_core/query_cache/query_base.h
@@ -9,28 +9,28 @@
namespace VideoCommon {
enum class QueryFlagBits : u32 {
- HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp.
- IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
- IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
- IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
- IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
- IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
- IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
- IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
- IsFence = 1 << 8, ///< Indicates the query is a fence.
- IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment
+ HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp.
+ IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
+ IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
+ IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
+ IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
+ IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
+ IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
+ IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
+ IsFence = 1 << 8, ///< Indicates the query is a fence.
+ IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment
};
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
class QueryBase {
public:
- VAddr guest_address;
- QueryFlagBits flags;
- u64 value;
+ VAddr guest_address{};
+ QueryFlagBits flags{};
+ u64 value{};
protected:
// Default constructor
- QueryBase() : guest_address(0), flags{}, value{} {}
+ QueryBase() = default;
// Parameterized constructor
QueryBase(VAddr address, QueryFlagBits flags_, u64 value_)
@@ -51,23 +51,21 @@ public:
class HostQueryBase : public QueryBase {
public:
// Default constructor
- HostQueryBase()
- : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{},
- size_banks{}, start_slot{}, size_slots{} {}
+ HostQueryBase() : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0) {}
// Parameterized constructor
- HostQueryBase(bool isLong, VAddr address)
+ HostQueryBase(bool has_timestamp, VAddr address)
: QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{},
start_slot{}, size_slots{} {
- if (isLong) {
+ if (has_timestamp) {
flags |= QueryFlagBits::HasTimestamp;
}
}
- u32 start_bank_id;
- u32 size_banks;
- size_t start_slot;
- size_t size_slots;
+ u32 start_bank_id{};
+ u32 size_banks{};
+ size_t start_slot{};
+ size_t size_slots{};
};
} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
index f1393d5c7..042af053c 100644
--- a/src/video_core/query_cache/query_cache.h
+++ b/src/video_core/query_cache/query_cache.h
@@ -54,7 +54,7 @@ public:
return new_id;
}
- bool HasPendingSync() override {
+ bool HasPendingSync() const override {
return !pending_sync.empty();
}
@@ -71,8 +71,10 @@ public:
continue;
}
query.flags |= QueryFlagBits::IsHostSynced;
- sync_values.emplace_back(query.guest_address, query.value,
- True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4);
+ sync_values.emplace_back(SyncValuesStruct{
+ .address = query.guest_address,
+ .value = query.value,
+ .size = static_cast<u64>(True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4)});
}
pending_sync.clear();
if (sync_values.size() > 0) {
@@ -90,15 +92,20 @@ class StubStreamer : public GuestStreamer<Traits> {
public:
using RuntimeType = typename Traits::RuntimeType;
- StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {}
+ StubStreamer(size_t id_, RuntimeType& runtime_, u32 stub_value_)
+ : GuestStreamer<Traits>(id_, runtime_), stub_value{stub_value_} {}
~StubStreamer() override = default;
size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value,
std::optional<u32> subreport = std::nullopt) override {
- size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport);
+ size_t new_id =
+ GuestStreamer<Traits>::WriteCounter(address, has_timestamp, stub_value, subreport);
return new_id;
}
+
+private:
+ u32 stub_value;
};
template <typename Traits>
@@ -113,7 +120,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
if (streamers[i]) {
- streamer_mask |= 1ULL << i;
+ streamer_mask |= 1ULL << streamers[i]->GetId();
}
}
}
@@ -152,7 +159,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
QueryCacheBase<Traits>* owner;
VideoCore::RasterizerInterface& rasterizer;
Core::Memory::Memory& cpu_memory;
- Traits::RuntimeType& runtime;
+ RuntimeType& runtime;
Tegra::GPU& gpu;
std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
u64 streamer_mask;
@@ -223,15 +230,11 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence);
size_t streamer_id = static_cast<size_t>(counter_type);
auto* streamer = impl->streamers[streamer_id];
- if (!streamer) [[unlikely]] {
- if (has_timestamp) {
- u64 timestamp = impl->gpu.GetTicks();
- gpu_memory->Write<u64>(addr + 8, timestamp);
- gpu_memory->Write<u64>(addr, 1ULL);
- } else {
- gpu_memory->Write<u32>(addr, 1U);
- }
- return;
+ if (streamer == nullptr) [[unlikely]] {
+ counter_type = QueryType::Payload;
+ payload = 1U;
+ streamer_id = static_cast<size_t>(counter_type);
+ streamer = impl->streamers[streamer_id];
}
auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr);
if (!cpu_addr_opt) [[unlikely]] {
@@ -403,12 +406,6 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
impl->runtime.EndHostConditionalRendering();
return false;
}
- /*if (!Settings::IsGPULevelHigh()) {
- impl->runtime.EndHostConditionalRendering();
- return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24,
- VideoCommon::CacheType::BufferCache |
- VideoCommon::CacheType::QueryCache);
- }*/
const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode);
const GPUVAddr address = regs.render_enable.Address();
switch (mode) {
@@ -442,6 +439,9 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
// Async downloads
template <typename Traits>
void QueryCacheBase<Traits>::CommitAsyncFlushes() {
+ // Make sure to have the results synced in Host.
+ NotifyWFI();
+
u64 mask{};
{
std::scoped_lock lk(impl->flush_guard);
@@ -458,8 +458,19 @@ void QueryCacheBase<Traits>::CommitAsyncFlushes() {
if (mask == 0) {
return;
}
- impl->ForEachStreamerIn(mask,
- [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); });
+ u64 ran_mask = ~mask;
+ while (mask) {
+ impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) {
+ u64 dep_mask = streamer->GetDependentMask();
+ if ((dep_mask & ~ran_mask) != 0) {
+ return;
+ }
+ u64 index = streamer->GetId();
+ ran_mask |= (1ULL << index);
+ mask &= ~(1ULL << index);
+ streamer->PushUnsyncedQueries();
+ });
+ }
}
template <typename Traits>
@@ -489,13 +500,11 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() {
if (mask == 0) {
return;
}
- u64 ran_mask = 0;
- u64 next_phase = 0;
+ u64 ran_mask = ~mask;
while (mask) {
- impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) {
+ impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) {
u64 dep_mask = streamer->GetDependenceMask();
if ((dep_mask & ~ran_mask) != 0) {
- next_phase |= dep_mask;
return;
}
u64 index = streamer->GetId();
@@ -503,7 +512,6 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() {
mask &= ~(1ULL << index);
streamer->PopUnsyncedQueries();
});
- ran_mask |= next_phase;
}
}
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h
index 55f508dd1..07be421c6 100644
--- a/src/video_core/query_cache/query_cache_base.h
+++ b/src/video_core/query_cache/query_cache_base.h
@@ -47,7 +47,7 @@ public:
BitField<0, 27, u32> query_id;
u32 raw;
- std::pair<size_t, size_t> unpack() {
+ std::pair<size_t, size_t> unpack() const {
return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())};
}
};
@@ -73,7 +73,7 @@ public:
}
}
- static u64 BuildMask(std::span<QueryType> types) {
+ static u64 BuildMask(std::span<const QueryType> types) {
u64 mask = 0;
for (auto query_type : types) {
mask |= 1ULL << (static_cast<u64>(query_type));
@@ -160,7 +160,7 @@ protected:
}
}
- using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>;
+ using ContentCache = std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>;
void InvalidateQuery(QueryLocation location);
bool IsQueryDirty(QueryLocation location);
@@ -175,7 +175,7 @@ protected:
friend struct QueryCacheBaseImpl;
friend RuntimeType;
- std::unique_ptr<struct QueryCacheBaseImpl> impl;
+ std::unique_ptr<QueryCacheBaseImpl> impl;
};
} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h
index 0e9275565..e7aac955b 100644
--- a/src/video_core/query_cache/query_stream.h
+++ b/src/video_core/query_cache/query_stream.h
@@ -16,7 +16,7 @@ namespace VideoCommon {
class StreamerInterface {
public:
- StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {}
+ explicit StreamerInterface(size_t id_) : id{id_}, dependence_mask{}, dependent_mask{} {}
virtual ~StreamerInterface() = default;
virtual QueryBase* GetQuery(size_t id) = 0;
@@ -37,7 +37,7 @@ public:
/* Do Nothing */
}
- virtual bool HasPendingSync() {
+ virtual bool HasPendingSync() const {
return false;
}
@@ -52,7 +52,7 @@ public:
virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
std::optional<u32> subreport = std::nullopt) = 0;
- virtual bool HasUnsyncedQueries() {
+ virtual bool HasUnsyncedQueries() const {
return false;
}
@@ -71,18 +71,28 @@ public:
}
u64 GetDependenceMask() const {
- return dependance_mask;
+ return dependence_mask;
+ }
+
+ u64 GetDependentMask() const {
+ return dependence_mask;
}
protected:
+ void MakeDependent(StreamerInterface* depend_on) {
+ dependence_mask |= 1ULL << depend_on->id;
+ depend_on->dependent_mask |= 1ULL << id;
+ }
+
const size_t id;
- const u64 dependance_mask;
+ u64 dependence_mask;
+ u64 dependent_mask;
};
template <typename QueryType>
class SimpleStreamer : public StreamerInterface {
public:
- SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {}
+ explicit SimpleStreamer(size_t id_) : StreamerInterface{id_} {}
virtual ~SimpleStreamer() = default;
protected:
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 2ba7cbb0d..af1469147 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,10 +9,10 @@
#include <utility>
#include "common/common_types.h"
#include "common/polyfill_thread.h"
-#include "video_core/query_cache/types.h"
#include "video_core/cache_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
+#include "video_core/query_cache/types.h"
#include "video_core/rasterizer_download_area.h"
namespace Tegra {
@@ -57,7 +57,8 @@ public:
virtual void ResetCounter(VideoCommon::QueryType type) = 0;
/// Records a GPU query and caches it
- virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0;
+ virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
+ VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0;
/// Signal an uniform buffer binding
virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index 57a8c4c85..23001eeb8 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -43,7 +43,8 @@ public:
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCommon::QueryType type) override;
- void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
+ void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
+ VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a975bbe75..27e2de1bf 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -405,8 +405,6 @@ void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
if (type == VideoCommon::QueryType::ZPassPixelCount64) {
- std::optional<u64> timestamp{True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)
- ? std::make_optional<u64>(gpu.GetTicks()) : std:: nullopt };
if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()});
} else {
@@ -414,13 +412,23 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
}
return;
}
- if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
- u64 ticks = gpu.GetTicks();
- gpu_memory->Write<u64>(gpu_addr + 8, ticks);
- gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload));
- } else {
- gpu_memory->Write<u32>(gpu_addr, payload);
+ if (type != VideoCommon::QueryType::Payload) {
+ payload = 1u;
+ }
+ std::function<void()> func([this, gpu_addr, flags, memory_manager = gpu_memory, payload]() {
+ if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
+ u64 ticks = gpu.GetTicks();
+ memory_manager->Write<u64>(gpu_addr + 8, ticks);
+ memory_manager->Write<u64>(gpu_addr, static_cast<u64>(payload));
+ } else {
+ memory_manager->Write<u32>(gpu_addr, payload);
+ }
+ });
+ if (True(flags & VideoCommon::QueryPropertiesFlags::IsAFence)) {
+ SignalFence(std::move(func));
+ return;
}
+ func();
}
void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 05e048e15..ceffe1f1e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -87,7 +87,8 @@ public:
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCommon::QueryType type) override;
- void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
+ void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
+ VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 97cd4521d..039dc95e1 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -303,9 +303,9 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
return {staging.buffer, staging.offset};
}
-ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(const Device& device_,
- Scheduler& scheduler_,
- DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
+ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(
+ const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
+ ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr,
RESOLVE_CONDITIONAL_RENDER_COMP_SPV),
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 14fc5ad71..336573574 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -7,8 +7,8 @@
#include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
namespace Core {
class System;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index ef891e26b..add0c6fb3 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -11,11 +11,9 @@
#include <utility>
#include <vector>
-#include <boost/container/small_vector.hpp>
-#include <boost/icl/interval_set.hpp>
-
#include "common/common_types.h"
#include "core/memory.h"
+#include "video_core/engines/draw_manager.h"
#include "video_core/query_cache/query_cache.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -30,6 +28,7 @@
namespace Vulkan {
+using Tegra::Engines::Maxwell3D;
using VideoCommon::QueryType;
namespace {
@@ -37,7 +36,7 @@ class SamplesQueryBank : public VideoCommon::BankBase {
public:
static constexpr size_t BANK_SIZE = 256;
static constexpr size_t QUERY_SIZE = 8;
- SamplesQueryBank(const Device& device_, size_t index_)
+ explicit SamplesQueryBank(const Device& device_, size_t index_)
: BankBase(BANK_SIZE), device{device_}, index{index_} {
const auto& dev = device.GetLogical();
query_pool = dev.CreateQueryPool({
@@ -109,18 +108,19 @@ struct HostSyncValues {
static constexpr bool GeneratesBaseBuffer = false;
};
-template <typename Traits>
class SamplesStreamer : public BaseStreamer {
public:
- SamplesStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_,
- Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
- : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_},
+ explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_,
+ Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
+ : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_},
memory_allocator{memory_allocator_} {
BuildResolveBuffer();
current_bank = nullptr;
current_query = nullptr;
}
+ ~SamplesStreamer() = default;
+
void StartCounter() override {
if (has_started) {
return;
@@ -157,7 +157,7 @@ public:
PauseCounter();
}
- bool HasPendingSync() override {
+ bool HasPendingSync() const override {
return !pending_sync.empty();
}
@@ -198,7 +198,7 @@ public:
}
resolve_slots_remaining = resolve_slots;
sync_values_stash.emplace_back();
- sync_values = sync_values = &sync_values_stash.back();
+ sync_values = &sync_values_stash.back();
sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE);
}
resolve_slots_remaining--;
@@ -207,6 +207,7 @@ public:
const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE *
(resolve_slots - resolve_slots_remaining - 1);
VkQueryPool query_pool = bank->GetInnerPool();
+ scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([start, amount, base_offset, query_pool,
buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) {
size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE;
@@ -284,7 +285,7 @@ public:
return index;
}
- bool HasUnsyncedQueries() override {
+ bool HasUnsyncedQueries() const override {
return !pending_flush_queries.empty();
}
@@ -348,8 +349,8 @@ private:
for (auto q : queries) {
auto* query = GetQuery(q);
ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) {
- auto id = bank->GetIndex();
- auto pair = indexer.try_emplace(id, std::numeric_limits<size_t>::max(),
+ auto id_ = bank->GetIndex();
+ auto pair = indexer.try_emplace(id_, std::numeric_limits<size_t>::max(),
std::numeric_limits<size_t>::min());
auto& current_pair = pair.first->second;
current_pair.first = std::min(current_pair.first, start);
@@ -434,13 +435,14 @@ private:
.pNext = nullptr,
.flags = 0,
.size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots,
- .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
resolve_buffers.emplace_back(
- std::move(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal)));
+ memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal));
}
static constexpr size_t resolve_slots = 8;
@@ -476,7 +478,8 @@ class TFBQueryBank : public VideoCommon::BankBase {
public:
static constexpr size_t BANK_SIZE = 1024;
static constexpr size_t QUERY_SIZE = 4;
- TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, size_t index_)
+ explicit TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator,
+ size_t index_)
: BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} {
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
@@ -525,22 +528,21 @@ private:
vk::Buffer buffer;
};
-template <typename Traits>
class PrimitivesSucceededStreamer;
-template <typename Traits>
class TFBCounterStreamer : public BaseStreamer {
public:
- TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_,
- Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
- StagingBufferPool& staging_pool_)
- : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_},
+ explicit TFBCounterStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_,
+ Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
+ StagingBufferPool& staging_pool_)
+ : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_},
memory_allocator{memory_allocator_}, staging_pool{staging_pool_} {
buffers_count = 0;
current_bank = nullptr;
counter_buffers.fill(VK_NULL_HANDLE);
offsets.fill(0);
last_queries.fill(0);
+ last_queries_stride.fill(1);
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
@@ -564,6 +566,8 @@ public:
}
}
+ ~TFBCounterStreamer() = default;
+
void StartCounter() override {
FlushBeginTFB();
has_started = true;
@@ -581,15 +585,15 @@ public:
if (has_flushed_end_pending) {
FlushEndTFB();
}
- runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) {
- if (regs.transform_feedback_enabled == 0) {
+ runtime.View3DRegs([this](Maxwell3D& maxwell3d) {
+ if (maxwell3d.regs.transform_feedback_enabled == 0) {
streams_mask = 0;
has_started = false;
}
});
}
- bool HasPendingSync() override {
+ bool HasPendingSync() const override {
return !pending_sync.empty();
}
@@ -650,14 +654,19 @@ public:
return index;
}
- std::optional<VAddr> GetLastQueryStream(size_t stream) {
+ std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) {
if (last_queries[stream] != 0) {
- return {last_queries[stream]};
+ std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]);
+ return result;
}
return std::nullopt;
}
- bool HasUnsyncedQueries() override {
+ Maxwell3D::Regs::PrimitiveTopology GetOutputTopology() const {
+ return out_topology;
+ }
+
+ bool HasUnsyncedQueries() const override {
return !pending_flush_queries.empty();
}
@@ -762,15 +771,17 @@ private:
void UpdateBuffers() {
last_queries.fill(0);
- runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) {
+ last_queries_stride.fill(1);
+ runtime.View3DRegs([this](Maxwell3D& maxwell3d) {
buffers_count = 0;
- for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
- i++) {
- const auto& tf = regs.transform_feedback;
+ out_topology = maxwell3d.draw_manager->GetDrawState().topology;
+ for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) {
+ const auto& tf = maxwell3d.regs.transform_feedback;
if (tf.buffers[i].enable == 0) {
continue;
}
const size_t stream = tf.controls[i].stream;
+ last_queries_stride[stream] = tf.controls[i].stride;
streams_mask |= 1ULL << stream;
buffers_count = std::max<size_t>(buffers_count, stream + 1);
}
@@ -785,7 +796,8 @@ private:
});
current_bank = &bank_pool.GetBank(current_bank_id);
}
- auto [dont_care, slot] = current_bank->Reserve();
+ auto [dont_care, other] = current_bank->Reserve();
+ const size_t slot = other; // workaround to compile bug.
current_bank->AddReference();
static constexpr VkMemoryBarrier READ_BARRIER{
@@ -818,11 +830,9 @@ private:
return {current_bank_id, slot};
}
- template <typename Traits>
friend class PrimitivesSucceededStreamer;
static constexpr size_t NUM_STREAMS = 4;
- static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL;
QueryCacheRuntime& runtime;
const Device& device;
@@ -851,6 +861,8 @@ private:
std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
std::array<VkDeviceSize, NUM_STREAMS> offsets{};
std::array<VAddr, NUM_STREAMS> last_queries;
+ std::array<size_t, NUM_STREAMS> last_queries_stride;
+ Maxwell3D::Regs::PrimitiveTopology out_topology;
u64 streams_mask;
};
@@ -858,32 +870,34 @@ class PrimitivesQueryBase : public VideoCommon::QueryBase {
public:
// Default constructor
PrimitivesQueryBase()
- : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{},
- dependant_index{}, dependant_manage{} {}
+ : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {}
// Parameterized constructor
- PrimitivesQueryBase(bool is_long, VAddr address)
- : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{},
- dependant_index{}, dependant_manage{} {
- if (is_long) {
+ PrimitivesQueryBase(bool has_timestamp, VAddr address)
+ : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) {
+ if (has_timestamp) {
flags |= VideoCommon::QueryFlagBits::HasTimestamp;
}
}
- u64 stride;
- VAddr dependant_address;
- size_t dependant_index;
- bool dependant_manage;
+ u64 stride{};
+ VAddr dependant_address{};
+ Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points};
+ size_t dependant_index{};
+ bool dependant_manage{};
};
-template <typename Traits>
class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> {
public:
- PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_,
- TFBCounterStreamer<QueryCacheParams>& tfb_streamer_, Core::Memory::Memory& cpu_memory_)
- : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(
- id, 1ULL << static_cast<u64>(VideoCommon::QueryType::StreamingByteCount)),
- runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {}
+ explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_,
+ TFBCounterStreamer& tfb_streamer_,
+ Core::Memory::Memory& cpu_memory_)
+ : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_},
+ tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {
+ MakeDependent(&tfb_streamer);
+ }
+
+ ~PrimitivesSucceededStreamer() = default;
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
std::optional<u32> subreport_) override {
@@ -901,8 +915,11 @@ public:
const size_t subreport = static_cast<size_t>(*subreport_);
auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport);
bool must_manage_dependance = false;
+ new_query->topology = tfb_streamer.GetOutputTopology();
if (dependant_address_opt) {
- new_query->dependant_address = *dependant_address_opt;
+ auto [dep_address, stride] = *dependant_address_opt;
+ new_query->dependant_address = dep_address;
+ new_query->stride = stride;
} else {
new_query->dependant_index =
tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_);
@@ -917,25 +934,28 @@ public:
}
return index;
}
+ new_query->stride = 1;
+ runtime.View3DRegs([new_query, subreport](Maxwell3D& maxwell3d) {
+ for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) {
+ const auto& tf = maxwell3d.regs.transform_feedback;
+ if (tf.buffers[i].enable == 0) {
+ continue;
+ }
+ if (tf.controls[i].stream != subreport) {
+ continue;
+ }
+ new_query->stride = tf.controls[i].stride;
+ break;
+ }
+ });
}
new_query->dependant_manage = must_manage_dependance;
- runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) {
- for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
- i++) {
- const auto& tf = regs.transform_feedback;
- if (tf.controls[i].stream != subreport) {
- continue;
- }
- new_query->stride = tf.controls[i].stride;
- break;
- }
- });
pending_flush_queries.push_back(index);
return index;
}
- bool HasUnsyncedQueries() override {
+ bool HasUnsyncedQueries() const override {
return !pending_flush_queries.empty();
}
@@ -960,22 +980,49 @@ public:
}
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
+ u64 num_vertices = 0;
if (query->dependant_manage) {
auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index);
- query->value = dependant_query->value / query->stride;
+ num_vertices = dependant_query->value / query->stride;
tfb_streamer.Free(query->dependant_index);
} else {
u8* pointer = cpu_memory.GetPointer(query->dependant_address);
u32 result;
std::memcpy(&result, pointer, sizeof(u32));
- query->value = static_cast<u64>(result) / query->stride;
+ num_vertices = static_cast<u64>(result) / query->stride;
}
+ query->value = [&]() -> u64 {
+ switch (query->topology) {
+ case Maxwell3D::Regs::PrimitiveTopology::Points:
+ return num_vertices;
+ case Maxwell3D::Regs::PrimitiveTopology::Lines:
+ return num_vertices / 2;
+ case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
+ return (num_vertices / 2) + 1;
+ case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
+ return num_vertices - 1;
+ case Maxwell3D::Regs::PrimitiveTopology::Patches:
+ case Maxwell3D::Regs::PrimitiveTopology::Triangles:
+ case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
+ return num_vertices / 3;
+ case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
+ case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
+ case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
+ return num_vertices - 2;
+ case Maxwell3D::Regs::PrimitiveTopology::Quads:
+ return num_vertices / 4;
+ case Maxwell3D::Regs::PrimitiveTopology::Polygon:
+ return 1U;
+ default:
+ return num_vertices;
+ }
+ }();
}
}
private:
QueryCacheRuntime& runtime;
- TFBCounterStreamer<QueryCacheParams>& tfb_streamer;
+ TFBCounterStreamer& tfb_streamer;
Core::Memory::Memory& cpu_memory;
// syncing queue
@@ -1005,7 +1052,10 @@ struct QueryCacheRuntimeImpl {
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
scheduler, memory_allocator, staging_pool),
primitives_succeeded_streamer(
- static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_),
+ static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer,
+ cpu_memory_),
+ primitives_needed_minus_suceeded_streamer(
+ static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u),
hcr_setup{}, hcr_is_set{}, is_hcr_running{} {
hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
@@ -1040,9 +1090,10 @@ struct QueryCacheRuntimeImpl {
// Streamers
VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer;
- SamplesStreamer<QueryCacheParams> sample_streamer;
- TFBCounterStreamer<QueryCacheParams> tfb_streamer;
- PrimitivesSucceededStreamer<QueryCacheParams> primitives_succeeded_streamer;
+ SamplesStreamer sample_streamer;
+ TFBCounterStreamer tfb_streamer;
+ PrimitivesSucceededStreamer primitives_succeeded_streamer;
+ VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_suceeded_streamer;
std::vector<std::pair<VAddr, VAddr>> little_cache;
std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
@@ -1059,7 +1110,7 @@ struct QueryCacheRuntimeImpl {
bool is_hcr_running;
// maxwell3d
- Tegra::Engines::Maxwell3D* maxwell3d;
+ Maxwell3D* maxwell3d;
};
QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
@@ -1074,13 +1125,13 @@ QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
staging_pool_, compute_pass_descriptor_queue, descriptor_pool);
}
-void QueryCacheRuntime::Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d) {
+void QueryCacheRuntime::Bind3DEngine(Maxwell3D* maxwell3d) {
impl->maxwell3d = maxwell3d;
}
template <typename Func>
void QueryCacheRuntime::View3DRegs(Func&& func) {
- func(impl->maxwell3d->regs);
+ func(*impl->maxwell3d);
}
void QueryCacheRuntime::EndHostConditionalRendering() {
@@ -1240,8 +1291,12 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp
return &impl->sample_streamer;
case QueryType::StreamingByteCount:
return &impl->tfb_streamer;
+ case QueryType::StreamingPrimitivesNeeded:
+ case QueryType::VtgPrimitivesOut:
case QueryType::StreamingPrimitivesSucceeded:
return &impl->primitives_succeeded_streamer;
+ case QueryType::StreamingPrimitivesNeededMinusSucceeded:
+ return &impl->primitives_needed_minus_suceeded_streamer;
default:
return nullptr;
}
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 9ad2929d7..e9a1ea169 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -49,7 +49,8 @@ public:
bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty);
bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1,
- VideoCommon::LookupData object_2, bool qc_dirty, bool equal_check);
+ VideoCommon::LookupData object_2, bool qc_dirty,
+ bool equal_check);
VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type);
@@ -66,7 +67,7 @@ private:
};
struct QueryCacheParams {
- using RuntimeType = Vulkan::QueryCacheRuntime;
+ using RuntimeType = typename Vulkan::QueryCacheRuntime;
};
using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index e8862ba04..c7ce7c312 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -194,15 +194,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
query_cache.NotifySegment(true);
-#if ANDROID
- if (Settings::IsGPULevelHigh()) {
- // This is problematic on Android, disable on GPU Normal.
- // query_cache.UpdateCounters();
- }
-#else
- // query_cache.UpdateCounters();
-#endif
-
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
@@ -294,15 +285,6 @@ void RasterizerVulkan::DrawTexture() {
query_cache.NotifySegment(true);
-#if ANDROID
- if (Settings::IsGPULevelHigh()) {
- // This is problematic on Android, disable on GPU Normal.
- // query_cache.UpdateCounters();
- }
-#else
- // query_cache.UpdateCounters();
-#endif
-
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false);
@@ -332,15 +314,6 @@ void RasterizerVulkan::Clear(u32 layer_count) {
FlushWork();
gpu_memory->FlushCaching();
-#if ANDROID
- if (Settings::IsGPULevelHigh()) {
- // This is problematic on Android, disable on GPU Normal.
- // query_cache.UpdateCounters();
- }
-#else
- // query_cache.UpdateCounters();
-#endif
-
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index ffd44c68d..ad069556c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -85,7 +85,8 @@ public:
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCommon::QueryType type) override;
- void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
+ void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
+ VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
void FlushAll() override;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index c87e5fb07..da03803aa 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -15,9 +15,13 @@
#include "common/common_types.h"
#include "common/polyfill_thread.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
-#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
+namespace VideoCommon {
+template <typename Trait>
+class QueryCacheBase;
+}
+
namespace Vulkan {
class CommandPool;
@@ -26,6 +30,8 @@ class Framebuffer;
class GraphicsPipeline;
class StateTracker;
+struct QueryCacheParams;
+
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class Scheduler {
@@ -63,7 +69,7 @@ public:
void InvalidateState();
/// Assigns the query cache.
- void SetQueryCache(QueryCache& query_cache_) {
+ void SetQueryCache(VideoCommon::QueryCacheBase<QueryCacheParams>& query_cache_) {
query_cache = &query_cache_;
}
@@ -219,7 +225,7 @@ private:
std::unique_ptr<MasterSemaphore> master_semaphore;
std::unique_ptr<CommandPool> command_pool;
- QueryCache* query_cache = nullptr;
+ VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;